home *** CD-ROM | disk | FTP | other *** search
- page 132,66,2,2
- opt nomd,nomex,nocex,loc,mu
- ;*******************************************
- ;Motorola Austin DSP Operation June 30,1988
- ;*******************************************
- ;DSP56000/1
- ;Memory to Memory FFT - 1024 point
- ;File name: C-56.asm
- ;**************************************************************************
- ; Maximum sample rate: 3.231 ms at 20.5 MHZ/ 2.453 ms at 27.0 MHz
- ; Memory Size: Prog: 137 words ; Data: 6656 words
- ; Number of clock cycles: 66240 (33120 instruction cycles)
- ; Clock Frequency: 20.5MHz/27.0MHz
- ; Instruction cycle time: 97.5ns / 74.1ns
- ;**************************************************************************
- ;
- ; Updated to use faster FFT method by overlapping the butterfly
- ;
- ; Main program to verify the operation of the DSP56000 Radix 2
- ; complex 1024 point FFT using the
- ; DSP56000/1's 56 bit arithmetic, 24 bit data storage and
- ; 24 bit coefficient storage.
- ;
- ; 1024 point complex FFT
- ; External data starts at address $400
- ; Internal data starts at address 0
- ; Coefficient table starts at address $800
- ;
- ;**************************************************************************
- ;
- include 'sinegen'
- include 'sincos'
- include 'wbh4m'
- include 'magsqr'
- include 'sqrt3'
- ;
- testr2f ident 1,0
-
- reset equ 0
- start equ $100
- points equ 1024
- outdata equ $1000
- data equ $400
- coef equ $800
- window equ $C00
- mag1 equ @pow(2.0,-8.0)
- mag2 equ @pow(2.0,-8.0)
- ;
- ; Generate two tone test input.
- ;
- org x:data
- sinegen points,mag1,0.16666,0.0
- org y:data
- sinegen points,mag2,0.235,0.0
- ;
- ; Generate FFT coefficients (twiddle factors).
- ;
- sincos points,coef
- ;
- ; Generate Blackman-Harris 4 Term Minimum Sidelobe Window.
- ;
- org y:window
- wbh4m points
- ;
- ; Program starts here.
- ;
- opt mex
- org p:reset
- jmp start
- org p:start
- ;
- ; Window data with Blackman-Harris 4 Term Minimum Sidelobe Window.
- ;
- move #data,r0
- move #window,r4
- move #-1,m0 ;linear addressing
- move m0,m4
- do #points,end_wdw
- move x:(r0),x1 y:(r4)+,y0
- mpyr x1,y0,a y:(r0),x0
- mpyr x0,y0,b a,x:(r0)
- move b,y:(r0)+
- end_wdw
- ;
- ; Do 1024 point complex FFT.
- ;
- jsr $100
- ;
- ; Calculate magnitude of FFT.
- ;
- move #outdata,r0
- move #-1,m0 ;linear addressing
- do #points,end_mag
- move l:(r0),x
- magsqr
- move a,l:(r0)
- move l:(r0),y
- sqrt3
- move b,x:(r0)+
- end_mag
- ;write linear data to x memory
-
- nop
- nop
- nop
- swi
-
- page 132,60,1,1
- opt nomd,mex
- fftr2fn macro data,coef,odata
- fftr2fn ident 1,0
- ;
- ; 1024 Point Complex Fast Fourier Transform Routine
- ;
- ; This routine performs a 1024 point complex FFT on external data
- ; using the Radix 2, Decimation in Time, Cooley-Tukey FFT algorithm.
- ;
- ; Complex input and output data
- ; Real data in X memory
- ; Imaginary data in Y memory
- ; Normally ordered input data
- ; Normally ordered output data
- ; Coefficient lookup table
- ; -Cosine values in X memory
- ; -Sine values in Y memory
- ;
- ; Macro Call - fftr2fn data,coef,odata
- ;
- ; data start of external data buffer
- ; odata start of external output data buffer
- ; coef start of sine/cosine table
- ;
- ; Radix 2, Decimation In Time Cooley-Tukey FFT algorithm
- ; ___________
- ; | |
- ; ar,ai ---->| Radix 2 |----> ar',ai'
- ; br,bi ---->| Butterfly |----> br',bi'
- ; |___________|
- ; ^
- ; |
- ; wr,wi
- ;
- ; ar' = ar + wr*br - wi*bi
- ; ai' = ai + wi*br + wr*bi
- ; br' = ar - wr*br + wi*bi = 2*ar - ar'
- ; bi' = ai - wi*br - wr*bi = 2*ai - ai'
- ;
- ; Address pointers are organized as follows:
- ;
- ; r0 = ar,ai input pointer n0 = group offset m0 = modulo (points)
- ; r1 = br,bi input pointer n1 = group offset m1 = modulo (points)
- ; r2 = ext. data base address n2 = groups per pass m2 = 256 point fft counter
- ; r3 = coef. offset each pass n3 = coefficient base address m3 = linear
- ; r4 = ar',ai' output pointer n4 = group offset m4 = modulo (points)
- ; r5 = br',bi' output pointer n5 = group offset m5 = modulo (points)
- ; r6 = wr,wi input pointer n6 = coef. offset m6 = bit reversed
- ; r7 = not used (*) n7 = output pointer storage m7 = not used (*)
- ;
- ; * - r7 and m7 are typically reserved for a user stack pointer.
- ;
- ; Alters Data ALU Registers
- ; x1 x0 y1 y0
- ; a2 a1 a0 a
- ; b2 b1 b0 b
- ;
- ; Alters Address Registers
- ; r0 n0 m0
- ; r1 n1 m1
- ; r2 n2 m2
- ; r3 n3 m3
- ; r4 n4 m4
- ; r5 n5 m5
- ; r6 n6 m6
- ; n7
- ; Alters Program Control Registers
- ; pc sr
- ;
- ; Uses 8 locations on System Stack
- ;
- ;
- _points equ 1024 ;number of FFT points
- _intdata equ 0 ;address of internal data workspace
- move #data,r2 ;initialize input pointers
- move r2,r0
- move #_points/4,n0 ;initialize butterflies per group
- move n0,n4 ;initialize pointer offsets
- move n0,n6 ;initialize coefficient offset
- move #coef,n3 ;initialize coefficient base address
- move #_points-1,m0 ;initialize address modifiers
- move m0,m1 ;for modulo(points) addressing
- move m0,m4
- move m0,m5
- move #-1,m3 ;linear addressing for coefficient base offset
- move #0,m2 ;initialize 256 point fft counter
- move m2,m6 ;initialize coefficient address modifier
- ;for reverse carry (bit reversed) addressing
- ;
- ; Do first and second Radix 2 FFT passes as four-point butterflies
- ;
- move x:(r0)+n0,x0 ;initialize butterfly
- tfr x0,a x:(r0)+n0,y1 ;
-
- do n0,_twopass
- tfr y1,b x:(r0)+n0,y0
- add y0,a x:(r0),x1 ;ar+cr
- add x1,b r0,r4 ;br+dr
- add a,b (r0)+n0 ;ar'=(ar+cr)+(br+dr)
- subl b,a b,x:(r0)+n0 ;br'=(ar+cr)-(br+dr)
- tfr x0,a a,x0 y:(r0),b
- sub y0,a y:(r4)+n4,y0 ;ar-cr
- sub y0,b x0,x:(r0) ;bi-di
- add a,b y:(r0)+n0,x0 ;cr'=(ar-cr)+(bi-di)
- subl b,a b,x:(r0) ;dr'=(ar-cr)-(bi-di)
- tfr x0,a a,x0 y:(r4),b
- add y0,a y:(r0)+n0,y0 ;bi+di
- add y0,b x0,x:(r0)+n0 ;ai+ci
- add b,a y:(r0)+,x0 ;ai'=(ai+ci)+(bi+di)
- subl a,b a,y:(r4)+n4 ;bi'=(ai+ci)-(bi+di)
- tfr x0,a b,y:(r4)+n4
- sub y0,a x1,b ;ai-ci
- sub y1,b x:(r0)+n0,x0 ;dr-br
- add a,b x:(r0)+n0,y1 ;ci'=(ai-ci)+(dr-br)
- subl b,a b,y:(r4)+n4 ;di'=(ai-ci)-(dr-br)
- tfr x0,a a,y:(r4)+
- _twopass
- ;
- ; Do remaining 8 FFT passes as four 256 point Radix 2 FFT's using internal data
- ; and external coefficients.
- ;
- ; Each 256 point Radix 2 FFT consists of 8 passes. The first pass uses external
- ; input data and internal output data to move the data on-chip. Intermediate
- ; passes use internal input data and internal output data to keep the data
- ; on-chip. The last pass uses internal input data and external output data
- ; to move the data off-chip.
- ;
- move #odata,r4 ;load output data address
- do #4,_end_fft ;do 256 point fft four times
- move r4,ssh ;push output data address onto stack
- move r2,r0 ;get external data input address for first pass
- move m2,r3 ;update coefficient offset
- move #256/2,n1 ;initialize butterflies per group
- move #1,n2 ;initialize groups per pass
-
- do #7,_end_pass ;do first 7 passes out of 8
- move #_intdata,r4 ;initialize A output pointer
-
- move n1,r5
- move n1,n0 ;initialize pointer offsets
- lua (r5)-,n7
-
- move n1,n4
- move n1,r6
- lua (r0)+n0,r1 ;initialize B input pointer
- lua (r4)+n4,r5 ;initialize B output pointer
-
- lua (r6)+,n4
- move n4,n5
- lua (r3)+n3,r6 ;initialize W input pointer
- move n4,n0
- ;
- ; initialize butterfly
- move x:(r1),x1 y:(r6),y0 ;lookup -sine value
- move y:(r0),b
- mac x1,y0,b x:(r6)+n6,x0 y:(r1)+,y1
- macr -x0,y1,b y:(r0),a ;
-
- do n2,_end_grp
-
- do n7,_end_bfy
- subl b,a x:(r0),b b,y:(r4)
- mac -x1,x0,b x:(r0)+,a a,y:(r5)
- macr -y1,y0,b x:(r1),x1
- subl b,a b,x:(r4)+ y:(r0),b
- mac x1,y0,b y:(r1)+,y1 ;Radix 2 DIT butterfly kernel
- macr -x0,y1,b a,x:(r5)+ y:(r0),a ;with constant twiddle factor
- _end_bfy
- move (r1)+n1
- subl b,a x:(r0),b b,y:(r4)
- mac -x1,x0,b x:(r0)+n0,a a,y:(r5)
- macr -y1,y0,b x:(r1),x1 y:(r6),y0 ;lookup -sine value
- subl b,a b,x:(r4)+n4 y:(r0),b
- mac x1,y0,b x:(r6)+n6,x0 y:(r1)+,y1
- macr -x0,y1,b a,x:(r5)+n5 y:(r0),a ;
-
- _end_grp
- move n1,b1
- lsr b n2,a1 ;divide butterflies per group by two
- lsl a b1,n1 ;multiply groups per pass by two
- move r3,b1
- lsr b a1,n2 ;divide coefficient offset by two
- move b1,r3
- move #_intdata,r0 ;intermediate passes use internal input data
- _end_pass
- ;
- ; Do last FFT pass and move output data off-chip to external data memory.
- ;
- move n7,r1
- move ssh,r4
- move (r1)+
-
- move r1,n0
- move r1,n1 ;correct pointer offset for last pass
- move r1,n4
- move r1,n5
-
- move #_points/4,n4 ;offset for output pointer A
- lua (r0)+,r1 ;initialize B input pointer
- lua (r4)+n4,r5 ;initialize B output pointer, first step
- move n4,n5 ;offset for output pointer B
- lua (r3)+n3,r6 ;initialize W input pointer
- move (r5)+n5 ;initialize B output pointer, second step
- move #0,m4 ;bit-reversed addressing for output pointer A
- move m4,m5 ;bit-reversed addressing for output pointer B
- move x:(r1),x1 y:(r6),y0
- move (r5)-n5 ;predecrement output pointer A
- move x:(r5),a y:(r0),b
-
- do n2,_lastpass
- mac x1,y0,b x:(r6)+n6,x0 y:(r1)+n1,y1 ;Radix 2 DIT butterfly kernel
- macr -x0,y1,b a,x:(r5)+n5 y:(r0),a ;with one butterfly per group
- subl b,a x:(r0),b b,y:(r4) ;and changing twiddle factor
- mac -x1,x0,b x:(r0)+n0,a a,y:(r5)
- macr -y1,y0,b x:(r1),x1 y:(r6),y0
- subl b,a b,x:(r4)+n4 y:(r0),b
- _lastpass
- move a,x:(r5)+n5 ;save last real result of these 256
- move m0,m4 ;reinitialize pointers for modulo addr.
- move m0,m5 ;
- move m2,r6 ;get fft counter
- move n6,n2 ;get fft data input offset
- move m3,m2 ;external data pointer uses linear arithmetic
- move (r6)+n6 ;increment fft counter (bit reversed)
- move (r2)+n2 ;point to next 256 point fft input data
- move r6,m2 ;save fft counter
- _end_fft
- endm
-
- org p:$100
- fftr2fn $400,$800,$1000
- rts
- end
-
- ə
-